In [1]:
import pandas as pd
import plotly.graph_objs as go
import plotly.express as px
import plotly.io as pio
pio.templates.default = "plotly_white"

data = pd.read_csv("Instagram-Reach.csv", encoding = "latin-1")
print(data.head())
                  Date  Instagram reach
0  2022-04-01T00:00:00             7620
1  2022-04-02T00:00:00            12859
2  2022-04-03T00:00:00            16008
3  2022-04-04T00:00:00            24349
4  2022-04-05T00:00:00            20532
In [2]:
data["Date"] = pd.to_datetime(data["Date"])
print(data.head())
        Date  Instagram reach
0 2022-04-01             7620
1 2022-04-02            12859
2 2022-04-03            16008
3 2022-04-04            24349
4 2022-04-05            20532
In [4]:
#Analyze the trend of Instagram reach over time using a line chart
fig = go.Figure()
fig.add_trace(go.Scatter(x=data["Date"],
                        y=data["Instagram reach"],
                        mode="lines", name="Instagram reach"))
fig.update_layout(title="Instagram Reach Trend", xaxis_title="Date",
                 yaxis_title="Instagram Reach")
fig.show()
In [5]:
#Analyze Instagram reach for each day using a bar chart:
fig = go.Figure()
fig.add_trace(go.Bar(x=data["Date"],
                    y=data["Instagram reach"],
                    name="Instagram reach"))
fig.update_layout(title="Instagram Reach by Day",
                 xaxis_title="Date",
                 yaxis_title="Instagram Reach")
fig.show()
In [7]:
#Analyze the distribution of Instagram reach using a box plot
fig = go.Figure()
fig.add_trace(go.Box(y=data["Instagram reach"],
                    name="Instagram reach"))
fig.update_layout(title="Instagram Reach Box Plot",
                 yaxis_title="Instagram Reach")
fig.show()
                
In [8]:
data["Day"] = data["Date"].dt.day_name()
print(data.head())
        Date  Instagram reach       Day
0 2022-04-01             7620    Friday
1 2022-04-02            12859  Saturday
2 2022-04-03            16008    Sunday
3 2022-04-04            24349    Monday
4 2022-04-05            20532   Tuesday
In [9]:
import numpy as np

day_stats = data.groupby("Day")["Instagram reach"].agg(["mean", "median", "std"]).reset_index()
print(day_stats)
         Day          mean   median           std
0     Friday  46666.849057  35574.0  29856.943036
1     Monday  52621.692308  46853.0  32296.071347
2   Saturday  47374.750000  40012.0  27667.043634
3     Sunday  53114.173077  47797.0  30906.162384
4   Thursday  48570.923077  39150.0  28623.220625
5    Tuesday  54030.557692  48786.0  32503.726482
6  Wednesday  51017.269231  42320.5  29047.869685
In [11]:
#Create a bar chart to visualize the reach for each day of the week
fig= go.Figure()
fig.add_trace(go.Bar(x=day_stats["Day"],
                    y=day_stats["mean"],
                    name="Mean"))
fig.add_trace(go.Bar(x=day_stats["Day"],
                    y=day_stats["median"],
                    name="Median"))
fig.add_trace(go.Bar(x=day_stats['Day'], 
                     y=day_stats['std'], 
                     name='Standard Deviation'))
fig.update_layout(title='Instagram Reach by Day of the Week', 
                  xaxis_title='Day', 
                  yaxis_title='Instagram Reach')
fig.show()
In [13]:
#Look at the Trends and Seasonal patterns of Instagram reach
from plotly.tools import mpl_to_plotly
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose

data = data[["Date", "Instagram reach"]]

result = seasonal_decompose(data["Instagram reach"],
                           model="multiplicative",
                           period=100)

fig = plt.figure()
fig = result.plot()

fig = mpl_to_plotly(fig)
fig.show()
<Figure size 640x480 with 0 Axes>
In [14]:
pd.plotting.autocorrelation_plot(data["Instagram reach"])
Out[14]:
<AxesSubplot:xlabel='Lag', ylabel='Autocorrelation'>
In [ ]:
from statsmodels.graphics.tsaplots import plot_pacf
plot_pacf(data["Instagram reach"], lags = 100)
In [19]:
p, d, q = 8, 1, 2

import statsmodels.api as sm
import warnings
model=sm.tsa.statespace.SARIMAX(data["Instagram reach"],
                               order=(p, d, q),
                               seasonal_order=(p, d, q, 12))
model=model.fit()
print(model.summary())
C:\Users\Beytullah\anaconda3\lib\site-packages\statsmodels\base\model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

                                     SARIMAX Results                                      
==========================================================================================
Dep. Variable:                    Instagram reach   No. Observations:                  365
Model:             SARIMAX(8, 1, 2)x(8, 1, 2, 12)   Log Likelihood               -3938.519
Date:                            Mon, 31 Jul 2023   AIC                           7919.039
Time:                                    14:30:07   BIC                           8000.175
Sample:                                         0   HQIC                          7951.327
                                            - 365                                         
Covariance Type:                              opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.1902      7.117      0.027      0.979     -13.758      14.138
ar.L2          0.4715      6.609      0.071      0.943     -12.482      13.425
ar.L3         -0.1178      1.517     -0.078      0.938      -3.091       2.856
ar.L4          0.0414      0.284      0.146      0.884      -0.515       0.598
ar.L5         -0.0211      0.187     -0.113      0.910      -0.388       0.346
ar.L6          0.0309      0.277      0.111      0.911      -0.513       0.575
ar.L7          0.0086      0.442      0.019      0.984      -0.857       0.874
ar.L8         -0.0135      0.248     -0.055      0.957      -0.500       0.473
ma.L1         -0.2245      7.113     -0.032      0.975     -14.165      13.716
ma.L2         -0.7087      6.831     -0.104      0.917     -14.096      12.679
ar.S.L12      -1.1027      1.516     -0.728      0.467      -4.073       1.868
ar.S.L24      -1.7695      2.255     -0.785      0.433      -6.188       2.649
ar.S.L36      -1.4525      1.937     -0.750      0.453      -5.249       2.344
ar.S.L48      -1.1027      1.581     -0.697      0.486      -4.202       1.997
ar.S.L60      -0.7956      1.130     -0.704      0.481      -3.010       1.419
ar.S.L72      -0.4568      0.798     -0.573      0.567      -2.020       1.106
ar.S.L84      -0.2267      0.506     -0.448      0.654      -1.219       0.765
ar.S.L96      -0.0550      0.247     -0.223      0.824      -0.539       0.429
ma.S.L12       0.2403      1.516      0.158      0.874      -2.732       3.213
ma.S.L24       0.8331      1.307      0.638      0.524      -1.728       3.394
sigma2      4.863e+08   1.69e-07   2.88e+15      0.000    4.86e+08    4.86e+08
===================================================================================
Ljung-Box (L1) (Q):                   0.01   Jarque-Bera (JB):               214.11
Prob(Q):                              0.93   Prob(JB):                         0.00
Heteroskedasticity (H):               0.72   Skew:                             0.29
Prob(H) (two-sided):                  0.07   Kurtosis:                         6.78
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 6.62e+31. Standard errors may be unstable.
In [21]:
#Make predictions using the model and have a look at the forecasted reach
predictions = model.predict(len(data), len(data)+100)

trace_train = go.Scatter(x=data.index,
                        y=data["Instagram reach"],
                        mode="lines",
                        name="Training Data")
trace_pred = go.Scatter(x=predictions.index,
                       y=predictions,
                       mode="lines",
                       name="Predictions")

layout = go.Layout(title="Instagram Reach Time Series and Predictions",
                  xaxis_title="Date",
                  yaxis_title="Instagram Reach")

fig = go.Figure(data=[trace_train, trace_pred], layout=layout)
fig.show()
In [ ]: